1 Neighbourhood boundaries

1.1 BCC suburbs

# orig data missing???
SUB <- read_csv("data-raw/geo/suburb-and-adjoining-suburb-november-2019.zip") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  select(suburb_name) %>% 
  rename(SSC_NAME16 = suburb_name) %>%  
  distinct() %>% 
  mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>% 
  arrange(SSC_NAME16)

write_rds(SUB, "data/geo/clean/SUB.Rds")

Full ( hopefully ;) list of Brisbane suburbs. Top 5 alphabetically:

SUB <- read_rds("data/geo/SUB.Rds")

# glimpse(SUB)

SUB %>% 
  slice(1:5) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
SSC_NAME16
Acacia Ridge
Albion
Alderley
Algester
Annerley

This might very well include areas with no pops (and therefore no SEIFA), for instance:

SUB %>% 
  filter(str_detect(SSC_NAME16, 
                    regex("port", ignore_case = TRUE))) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
SSC_NAME16
Brisbane Airport
Port Of Brisbane

1.2 ABS

Names from Brisbane containing (Brisbane - Qld), names from Qld containing (Qld) have to be cleaned to match BCC data.

Mcdowall is called McDowall and Mount Coot-tha is Mount Coot-Tha - these have been unifeied as well.

unzip("data-raw/geo/1270055003_ssc_2016_aust_shape.zip", 
      exdir = "data-raw/geo")

SSC <- st_read("data-raw/geo/1270055003_ssc_2016_aust_shape/SSC_2016_AUST.shp", 
               stringsAsFactors = FALSE) %>% 
  mutate(SSC_CODE16 = as.integer(SSC_CODE16)) %>% 
  select(-STE_NAME16, -STE_CODE16, -AREASQKM16) %>%     
  st_transform(3112) %>%    
  filter(!st_is_empty(geometry)) %>% 
  mutate(SSC_NAME16_orig = SSC_NAME16) %>%  
  mutate(SSC_NAME16 = str_remove(SSC_NAME16, 
                                 fixed(" (Brisbane - Qld)"))) %>% 
  mutate(SSC_NAME16 = str_remove(SSC_NAME16, 
                                 fixed(" (Qld)"))) %>% 
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "McDowall",
                             "Mcdowall", SSC_NAME16)) %>% 
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Mount Coot-tha",
                             "Mount Coot-Tha", SSC_NAME16))
## Reading layer `SSC_2016_AUST' from data source `C:\external\FUN_BCC-animals\data-raw\geo\1270055003_ssc_2016_aust_shape\SSC_2016_AUST.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3264 features and 5 fields (with 2 geometries empty)
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 137.9943 ymin: -29.1779 xmax: 153.5522 ymax: -9.142176
## Geodetic CRS:  GDA94
# SSC <- ms_simplify(SSC, keep = 0.05, weighting = 0.7) # default settings

write_rds(SSC, "data/geo/SSC_2016_AUST.Rds")

unlink("data-raw/geo/1270055003_ssc_2016_aust_shape", recursive = TRUE)

Areas without matches using original names

SUB %>% 
  left_join(SSC) %>% 
  select(-geometry) %>% 
  filter(SSC_NAME16 != SSC_NAME16_orig) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
SSC_NAME16 SSC_CODE16 SSC_NAME16_orig
Albion 30024 Albion (Brisbane - Qld)
Ascot 30089 Ascot (Brisbane - Qld)
Bald Hills 30123 Bald Hills (Qld)
Balmoral 30132 Balmoral (Qld)
Belmont 30226 Belmont (Qld)
Brighton 30375 Brighton (Qld)
Brookfield 30388 Brookfield (Qld)
Carina 30542 Carina (Qld)
Chandler 30583 Chandler (Qld)
Chapel Hill 30584 Chapel Hill (Qld)
Durack 30913 Durack (Qld)
Fairfield 31025 Fairfield (Qld)
Grange 31236 Grange (Qld)
Hamilton 31301 Hamilton (Qld)
Jindalee 31459 Jindalee (Qld)
Kangaroo Point 31496 Kangaroo Point (Qld)
Kooringal 31586 Kooringal (Qld)
Larapinta 31654 Larapinta (Qld)
Macgregor 31736 Macgregor (Qld)
Mackenzie 31741 Mackenzie (Brisbane - Qld)
Manly 31764 Manly (Qld)
Mansfield 31768 Mansfield (Qld)
Mcdowall 31806 McDowall
Middle Park 31839 Middle Park (Qld)
Milton 31864 Milton (Qld)
Mount Coot-Tha 31969 Mount Coot-tha
Newstead 32156 Newstead (Qld)
Northgate 32207 Northgate (Qld)
Oxley 32262 Oxley (Qld)
Paddington 32269 Paddington (Qld)
Red Hill 32424 Red Hill (Brisbane - Qld)
Richlands 32447 Richlands (Qld)
Robertson 32467 Robertson (Qld)
Rocklea 32474 Rocklea (Qld)
Salisbury 32527 Salisbury (Qld)
Sandgate 32532 Sandgate (Qld)
Seven Hills 32562 Seven Hills (Qld)
Sherwood 32579 Sherwood (Qld)
Spring Hill 32648 Spring Hill (Qld)
Tennyson 32798 Tennyson (Qld)
The Gap 32817 The Gap (Brisbane - Qld)
Virginia 32981 Virginia (Qld)
West End 33063 West End (Brisbane - Qld)
Windsor 33126 Windsor (Qld)
Wishart 33132 Wishart (Qld)

Stones Corner doesn’t exist in ABS but it does in BCC. It seems it’s part of Greenslopes.

SUB %>% 
  left_join(SSC) %>% 
  select(-geometry) %>% 
  filter(is.na(SSC_CODE16))
## # A tibble: 1 x 3
##   SSC_NAME16    SSC_CODE16 SSC_NAME16_orig
##   <chr>              <int> <chr>          
## 1 Stones Corner         NA <NA>

Full map

SSC %<>% 
  right_join(SUB) %>%
  filter(SSC_NAME16 != "Stones Corner")

# SSC %>% 
#   plot(max.plot = 1)

qtm(SSC, fill = NULL, borders = "darkorchid4", 
    text ="SSC_NAME16", text.col = "darkorchid4")

2 SEIFA

SEIFA <- read_xls("data-raw/SEIFA/2033055001 - ssc indexes.xls", 
                  sheet = "Table 1", skip = 5, n_max = 13719, na = "-") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  dplyr::rename(SSC_CODE16 = x1,
                SSC_NAME16 = x2,
                IRSD = score_3,
                IRSD_d = decile_4,
                IRSAD = score_5,
                IRSAD_d = decile_6,
                IER = score_7,
                IER_d = decile_8,
                IEO = score_9,
                IEO_d = decile_10,
                URP = x11,
                caution = x12) %>% 
  mutate(SSC_CODE16 = as.integer(SSC_CODE16),
         IRSD = as.integer(IRSD),
         IRSAD = as.integer(IRSAD),
         IER = as.integer(IER),
         IEO = as.integer(IEO),
         IRSD_d = as.integer(IRSD_d),
         IRSAD_d = as.integer(IRSAD_d),
         IER_d = as.integer(IER_d),
         IEO_d = as.integer(IEO_d),
         URP = as.integer(URP)
  ) %>% 
  mutate(caution = as.logical(ifelse(is.na(caution), "False", "True")))

write_rds(SEIFA, "data/SEIFA/SEIFA.Rds")

2.1 Coverage

2.1.1 Data for Australia, example of IRSD.

SEIFA %<>% 
  select(-SSC_NAME16)

frq(SEIFA$IRSD_d)
## 
## x <integer>
## # total N=13713  valid N=13691  mean=5.50  sd=2.87
## 
## Value |    N | Raw % | Valid % | Cum. %
## ---------------------------------------
##     1 | 1369 |  9.98 |   10.00 |  10.00
##     2 | 1369 |  9.98 |   10.00 |  20.00
##     3 | 1370 |  9.99 |   10.01 |  30.01
##     4 | 1370 |  9.99 |   10.01 |  40.01
##     5 | 1367 |  9.97 |    9.98 |  50.00
##     6 | 1370 |  9.99 |   10.01 |  60.00
##     7 | 1369 |  9.98 |   10.00 |  70.00
##     8 | 1369 |  9.98 |   10.00 |  80.00
##     9 | 1371 | 10.00 |   10.01 |  90.02
##    10 | 1367 |  9.97 |    9.98 | 100.00
##  <NA> |   22 |  0.16 |    <NA> |   <NA>
SEIFA %>% 
  ggplot(aes(x = IRSD_d)) + 
  geom_bar() 

SEIFA %>% 
  ggplot(aes(x = as.factor(IRSD_d), y = IRSD)) + 
  geom_boxplot(varwidth = TRUE) 

2.1.2 Brisbane suburbs only

SSC %<>% 
  left_join(SEIFA) 

# SSC %>% 
#   st_drop_geometry() %>% 
#   glimpse()

frq(SSC$IRSD_d)
## 
## x <integer>
## # total N=193  valid N=184  mean=7.79  sd=2.37
## 
## Value |  N | Raw % | Valid % | Cum. %
## -------------------------------------
##     1 |  3 |  1.55 |    1.63 |   1.63
##     2 |  6 |  3.11 |    3.26 |   4.89
##     3 |  5 |  2.59 |    2.72 |   7.61
##     4 |  5 |  2.59 |    2.72 |  10.33
##     5 | 13 |  6.74 |    7.07 |  17.39
##     6 | 12 |  6.22 |    6.52 |  23.91
##     7 | 28 | 14.51 |   15.22 |  39.13
##     8 | 18 |  9.33 |    9.78 |  48.91
##     9 | 33 | 17.10 |   17.93 |  66.85
##    10 | 61 | 31.61 |   33.15 | 100.00
##  <NA> |  9 |  4.66 |    <NA> |   <NA>
SSC %>% 
  ggplot(aes(x = IRSD_d)) + 
  geom_bar() 

SSC %>% 
  filter(!is.na(IRSD_d)) %>% 
  ggplot(aes(x = as.factor(IRSD_d), y = IRSD)) + 
  geom_boxplot(varwidth = TRUE) 

2.2 Missing

Few areas with missing SEIFA

SSC %>% 
  st_drop_geometry() %>% 
  filter_at(vars(ends_with("_d")), 
            any_vars(is.na(.))) %>%   
  select(SSC_NAME16, ends_with("_d"), URP)
##           SSC_NAME16 IRSD_d IRSAD_d IER_d IEO_d URP
## 1        Banks Creek     NA      NA    NA    NA  NA
## 2   Brisbane Airport     NA      NA    NA    NA  NA
## 3         Eagle Farm     NA      NA    NA    NA  NA
## 4 Enoggera Reservoir     NA      NA    NA    10  25
## 5          Karawatha     NA      NA    NA    NA  NA
## 6          Larapinta     NA      NA    NA    NA  NA
## 7             Lytton     NA      NA    NA    NA  NA
## 8     Mount Coot-Tha     NA      NA    NA    NA  NA
## 9   Port Of Brisbane     NA      NA    NA    NA  NA

These were excluded.

SSC %<>% 
  filter_at(vars(ends_with("_d")), all_vars(!is.na(.))) 

2.3 Caution

Few cases with ABS flag caution.

frq(SSC$caution)
## 
## x <lgl>
## # total N=184  valid N=184  mean=0.02  sd=0.15
## 
## Value |   N | Raw % | Valid % | Cum. %
## --------------------------------------
## FALSE | 180 | 97.83 |   97.83 |  97.83
## TRUE  |   4 |  2.17 |    2.17 | 100.00
## <NA>  |   0 |  0.00 |    <NA> |   <NA>

Usually with very small pop numbers.

SSC %>% 
  st_drop_geometry() %>% 
  group_by(caution) %>% 
  summarise(mean = mean(URP),
            min = min(URP),
            max = max(URP))
## # A tibble: 2 x 4
##   caution   mean   min   max
##   <lgl>    <dbl> <int> <int>
## 1 FALSE   6295.     23 22904
## 2 TRUE      38.8    28    49
SSC %>% 
  st_drop_geometry() %>% 
  filter(caution) %>% 
  select(-caution, -SSC_CODE16, -SSC_NAME16_orig)
##      SSC_NAME16 IRSD IRSD_d IRSAD IRSAD_d  IER IER_d  IEO IEO_d URP
## 1        Bulwer 1014      6   996       6 1012     5 1024     7  49
## 2   Cowan Cowan 1014      6   996       6 1012     5 1024     7  28
## 3 England Creek 1004      5   978       5 1063     8  949     3  33
## 4     Kooringal 1014      6   996       6 1012     5 1024     7  45

These remain included.

2.4 ‘Local’ deciles

Original values of indices were used to calculate ‘local deciles’ using SSCs for Brisbane only

SSC %<>% 
  mutate(IRSD_d_orig = IRSD_d) %>% 
  mutate(IRSD_d = ntile(IRSD, 10)) %>%
  mutate(IRSAD_d_orig = IRSAD_d) %>% 
  mutate(IRSAD_d = ntile(IRSAD, 10)) %>%
  mutate(IER_d_orig = IER_d) %>% 
  mutate(IER_d = ntile(IER, 10)) %>%
  mutate(IEO_d_orig = IEO_d) %>% 
  mutate(IEO_d = ntile(IEO, 10))

write_rds(SSC, "data/geo/SSC.Rds")

2.4.1 IRSD_d

# frq(SSC$IRSD_d_orig)
# frq(SSC$IRSD_d)

SSC %>% 
  st_drop_geometry() %>% 
  tabyl(IRSD_d_orig, IRSD_d)
##  IRSD_d_orig 1 10  2  3  4  5  6  7  8  9
##            1 3  0  0  0  0  0  0  0  0  0
##            2 6  0  0  0  0  0  0  0  0  0
##            3 5  0  0  0  0  0  0  0  0  0
##            4 5  0  0  0  0  0  0  0  0  0
##            5 0  0 13  0  0  0  0  0  0  0
##            6 0  0  6  6  0  0  0  0  0  0
##            7 0  0  0 13 15  0  0  0  0  0
##            8 0  0  0  0  4 14  0  0  0  0
##            9 0  0  0  0  0  4 18 11  0  0
##           10 0 18  0  0  0  0  0  7 18 18
plot_xtab(SSC$IRSD_d, SSC$IRSD_d_orig, 
          margin = "row", bar.pos = "stack",
          show.summary = TRUE, coord.flip = TRUE)

tabz <- table(SSC$IRSD_d, SSC$IRSD_d_orig)
assocplot(tabz,
          xlab = "IRSD_d", ylab = "IRSD_d_orig")

mosaicplot(tabz,
           xlab = "IRSD_d", ylab = "IRSD_d_orig")

tm_shape(SSC) +
  tm_polygons(col = "IRSD_d", n = 10, palette = "div",
              id = "SSC_NAME16", 
              popup.vars = c("SSC_NAME16", "IRSD_d", "IRSD"))

2.4.2 IRSAD_d

# frq(SSC$IRSAD_d_orig)
# frq(SSC$IRSAD_d)

SSC %>% 
  st_drop_geometry() %>% 
  tabyl(IRSAD_d_orig, IRSAD_d)
##  IRSAD_d_orig 1 10  2  3  4  5  6  7  8  9
##             1 3  0  0  0  0  0  0  0  0  0
##             2 4  0  0  0  0  0  0  0  0  0
##             3 4  0  0  0  0  0  0  0  0  0
##             4 2  0  0  0  0  0  0  0  0  0
##             5 6  0  2  0  0  0  0  0  0  0
##             6 0  0 11  0  0  0  0  0  0  0
##             7 0  0  6  5  0  0  0  0  0  0
##             8 0  0  0 14  7  0  0  0  0  0
##             9 0  0  0  0 12 18  5  0  0  0
##            10 0 18  0  0  0  0 13 18 18 18
plot_xtab(SSC$IRSAD_d, SSC$IRSAD_d_orig, 
          margin = "row", bar.pos = "stack",
          show.summary = TRUE, coord.flip = TRUE)

tabz <- table(SSC$IRSAD_d, SSC$IRSAD_d_orig)
assocplot(tabz,
          xlab = "IRSAD_d", ylab = "IRSAD_d_orig")

mosaicplot(tabz,
           xlab = "IRSAD_d", ylab = "IRSAD_d_orig")

tm_shape(SSC) +
  tm_polygons(col = "IRSAD_d", n = 10, palette = "div",
              id = "SSC_NAME16", 
              popup.vars = c("SSC_NAME16", "IRSAD_d", "IRSAD"))

2.4.3 IER_d

# frq(SSC$IER_d_orig)
# frq(SSC$IER_d)

SSC %>% 
  st_drop_geometry() %>% 
  tabyl(IER_d_orig, IER_d)
##  IER_d_orig  1 10  2  3  4  5  6 7  8  9
##           1 19  0  1  0  0  0  0 0  0  0
##           2  0  0 18  4  0  0  0 0  0  0
##           3  0  0  0 15  3  0  0 0  0  0
##           4  0  0  0  0 16  2  0 0  0  0
##           5  0  0  0  0  0 16  3 0  0  0
##           6  0  0  0  0  0  0 15 5  0  0
##           7  0  0  0  0  0  0  0 6  0  0
##           8  0  0  0  0  0  0  0 7  5  0
##           9  0  0  0  0  0  0  0 0 13  3
##          10  0 18  0  0  0  0  0 0  0 15
plot_xtab(SSC$IER_d, SSC$IER_d_orig, 
          margin = "row", bar.pos = "stack",
          show.summary = TRUE, coord.flip = TRUE)

tabz <- table(SSC$IER_d, SSC$IER_d_orig)
assocplot(tabz,
          xlab = "IER_d", ylab = "IER_d_orig")

mosaicplot(tabz,
           xlab = "IER_d", ylab = "IER_d_orig")

tm_shape(SSC) +
  tm_polygons(col = "IER_d", n = 10, palette = "div",
              id = "SSC_NAME16", 
              popup.vars = c("SSC_NAME16", "IER_d", "IER"))

2.4.4 IEO_d

# frq(SSC$IEO_d_orig)
# frq(SSC$IEO_d)

SSC %>% 
  st_drop_geometry() %>% 
  tabyl(IEO_d_orig, IEO_d)
##  IEO_d_orig 1 10  2  3  4  5  6  7  8  9
##           1 4  0  0  0  0  0  0  0  0  0
##           2 3  0  0  0  0  0  0  0  0  0
##           3 3  0  0  0  0  0  0  0  0  0
##           4 9  0  0  0  0  0  0  0  0  0
##           5 0  0  5  0  0  0  0  0  0  0
##           6 0  0  4  0  0  0  0  0  0  0
##           7 0  0 10  7  0  0  0  0  0  0
##           8 0  0  0 12  7  0  0  0  0  0
##           9 0  0  0  0 12 18  3  0  0  0
##          10 0 18  0  0  0  0 15 18 18 18
plot_xtab(SSC$IEO_d, SSC$IEO_d_orig, 
          margin = "row", bar.pos = "stack",
          show.summary = TRUE, coord.flip = TRUE)

tabz <- table(SSC$IEO_d, SSC$IEO_d_orig)
assocplot(tabz,
          xlab = "IEO_d", ylab = "IEO_d_orig")

mosaicplot(tabz,
           xlab = "IEO_d", ylab = "IEO_d_orig")

tm_shape(SSC) +
  tm_polygons(col = "IEO_d", n = 10, palette = "div",
              id = "SSC_NAME16", 
              popup.vars = c("SSC_NAME16", "IEO_d", "IEO"))

3 Dog cost

dog_cost <- read_xlsx("data-raw/costs/dog_expensive.xlsx") %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  select(-web_source) %>% 
  rename(dog_breed = breed) %>%  
  select(dog_breed) %>% 
  distinct() %>% 
  # correcting names for better matching - these one used in BCE
  mutate(
    dog_breed = case_when(
      dog_breed == "Hairless Chinese Crested" ~ "Chinese Crested Dog",
      dog_breed == "Saint Bernard" ~ "St Bernard",
      TRUE ~ as.character(dog_breed))
  ) %>% 
  # synonyms
  add_row(dog_breed = "Dogue de Bordeaux") %>% 
  add_row(dog_breed = "Bulldog") %>% 
  add_row(dog_breed = "British Bulldog") %>% 
  mutate(expensive = "yes") %>% 
  arrange(dog_breed)

write_rds(dog_cost, "data/costs/dog_cost.Rds")

Top 20 most expensive dogs (+3 synonyms!)

dog_cost %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
dog_breed expensive
Akita yes
Azawakh yes
Black Russian Terrier yes
British Bulldog yes
Bulldog yes
Canadian Eskimo Dog yes
Chinese Crested Dog yes
Chow Chow yes
Dogue de Bordeaux yes
English Bulldog yes
French Bulldog yes
French Mastiff yes
German Shepherd yes
Irish Wolfhound yes
Lowchen yes
Maltese yes
Pharaoh Hound yes
Rottweiler yes
Saluki yes
Samoyed yes
St Bernard yes
Tibetan Mastiff yes
Yorkshire Terrier yes

4 Dog insurance

Data scraped from https://top10petinsurance.com.au/pet-insurance-prices on the 30th March 2020

# packages needed
# install.packages("rvest")

library(rvest)
library(tidyverse)

# scraping table done with this using chrome: https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/
url <- 'https://top10petinsurance.com.au/pet-insurance-prices/'   

dog_insurance <- url %>%
  xml2::read_html() %>%
  html_nodes(xpath='//*[@id="post-1016"]/div/table') %>%
  html_table()

dog_insurance <- dog_insurance[[1]]

head(dog_insurance)

write_rds(dog_insurance, "data-raw/costs/dog_insurance.Rds") # extracted on the 30 March 2020
dog_insurance <- read_rds("data-raw/costs/dog_insurance.Rds") %>% 
  as_tibble() %>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  # select(-) %>% 
  mutate(average_accident_policy_cost_annual = 
           gsub(",", "",
                average_accident_policy_cost_annual, 
                fixed = TRUE),
         average_illness_policy_cost_annual = 
           gsub(",", "",
                average_illness_policy_cost_annual, 
                fixed = TRUE),
         average_comprehensive_policy_cost_annual = 
           gsub(",", "",
                average_comprehensive_policy_cost_annual, 
                fixed = TRUE)
  ) %>% 
  mutate(average_accident_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_accident_policy_cost_annual, 
                           fixed = TRUE)),
         average_illness_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_illness_policy_cost_annual, 
                           fixed = TRUE)),
         average_comprehensive_policy_cost_annual = 
           as.numeric(gsub("$", "",
                           average_comprehensive_policy_cost_annual, 
                           fixed = TRUE))
  )

write_rds(dog_insurance, "data/costs/dog_insurance.Rds") 

4.1 Three major categories

dog_insurance %>% 
  frq(cost_compared_to_other_breeds) %>% 
  kableExtra::kable()
val label frq raw.prc valid.prc cum.prc
Above average <none> 58 10.39 10.39 10.39
Below average <none> 457 81.90 81.90 92.29
Significantly above average <none> 43 7.71 7.71 100.00
NA NA 0 0.00 NA NA

4.2 Individual breeds

Breeds in Above average and Significantly above average categories:

dog_insurance %>% 
  select(dog_breed, average_comprehensive_policy_cost_annual, cost_compared_to_other_breeds) %>% 
  filter(cost_compared_to_other_breeds != "Below average") %>% 
  arrange(desc(average_comprehensive_policy_cost_annual)) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
dog_breed average_comprehensive_policy_cost_annual cost_compared_to_other_breeds
Grand Basset Griffon Vendeen 1230 Significantly above average
Hygen Hound Cross 1221 Significantly above average
Formosan Mountain Dog 1221 Significantly above average
Italian Cane Corso 1221 Significantly above average
Australian Bulldog Miniature 1190 Significantly above average
Australian Bulldog Miniature Cross 1190 Significantly above average
Drever 1190 Significantly above average
French Bulldog 1189 Significantly above average
Yorkshire Terrier 1189 Significantly above average
Airedale Terrier 1189 Significantly above average
Basset Hound 1189 Significantly above average
Bullmastiff 1189 Significantly above average
Great Dane 1189 Significantly above average
Rottweiler 1189 Significantly above average
Weimaraner 1189 Significantly above average
Alaskan Malamute 1189 Significantly above average
American Akita 1189 Significantly above average
Australian Bulldog 1189 Significantly above average
Bedlington Terrier 1189 Significantly above average
Boxer 1189 Significantly above average
British Bulldog 1189 Significantly above average
Dogue De Bordeaux 1189 Significantly above average
Drever Cross 1189 Significantly above average
Hygen Hound 1189 Significantly above average
Irish Setter 1189 Significantly above average
Irish Wolfhound 1189 Significantly above average
Mastiff 1189 Significantly above average
Miniature Doberman 1189 Significantly above average
Miniature Pinscher 1189 Significantly above average
Pekingese 1189 Significantly above average
Poodle – Standard 1189 Significantly above average
Shar-Pei 1189 Significantly above average
St Bernard 1189 Significantly above average
Welsh Corgi – Cardigan 1189 Significantly above average
Welsh Corgi – Pembroke 1189 Significantly above average
Wire-Haired Terrier 1189 Significantly above average
Bernese Mountain Dog 1189 Significantly above average
Unknown Dog Breed 1168 Significantly above average
Akita Inu 1162 Significantly above average
Alaskan Klee Kai 1155 Significantly above average
Neopolitan Mastiff 1135 Significantly above average
Newfoundland 1135 Significantly above average
Bull Terrier 1110 Significantly above average
Corgi 945 Above average
Alaskan Malamute Cross 913 Above average
Irish Wolfhound Cross 913 Above average
Rottweiler Cross 913 Above average
Polish Lowland Sheepdog Cross 903 Above average
Corgi Cross 900 Above average
Welsh Corgi – Pembroke Cross 885 Above average
British Bulldog Cross 885 Above average
Weimaraner Cross 885 Above average
Miniature Pinscher Cross 885 Above average
Akita Inu Cross 885 Above average
American Bulldog 885 Above average
Australian Bulldog Cross 885 Above average
Bedlington Terrier Cross 885 Above average
Bernese Mountain Dog Cross 885 Above average
Boxer Cross 885 Above average
Chow Chow 885 Above average
Dobermann 885 Above average
Labrador Retriever 885 Above average
Mastiff Cross 885 Above average
Newfoundland Cross 885 Above average
Pekingese Cross 885 Above average
Poodle – Standard Cross 885 Above average
St Bernard Cross 885 Above average
Welsh Corgi – Cardigan Cross 885 Above average
Airedale Terrier Cross 885 Above average
American Akita Cross 885 Above average
American Cocker Spaniel 885 Above average
Basset Hound Cross 885 Above average
Bullmastiff Cross 885 Above average
Dachshund Cross 885 Above average
Doberman Pinscher 885 Above average
Dogue De Bordeaux Cross 885 Above average
English Pointer 885 Above average
French Bulldog Cross 885 Above average
German Short Haired Pointer Cross 885 Above average
Great Dane Cross 885 Above average
Irish Setter Cross 885 Above average
Labrador 885 Above average
Miniature Doberman Cross 885 Above average
Miniature Poodle 885 Above average
Old English Sheepdog 885 Above average
Papillon Cross 885 Above average
Polish Lowland Sheepdog 885 Above average
Rhodesian Ridgeback Cross 885 Above average
Samoyed 885 Above average
Shar-Pei Cross 885 Above average
Neopolitan Mastiff Cross 885 Above average
Wire-Haired Terrier Cross 885 Above average
Wolfhound Cross 885 Above average
Yorkshire Terrier Cross 885 Above average
Dalmatian 885 Above average
Bull Terrier Cross 869 Above average
King Charles Spaniel 869 Above average
Rhodesian Ridgeback 858 Above average
Wolfhound 846 Above average
Pinscher 836 Above average
Pinscher Cross 808 Above average

5 Dog ownership in Brisbane

dog_ownership <- read_csv("data-raw/permits/cars-bis-open-data-animal-permits-3-jan-2019.zip")

Raw dataset consists of 107,405 records.

5.1 Data selection

Excluding records with permit_name: Breeders Permit, Cattery Permit, Racehorses Permit, Pet Shop Permit, Domestic Dog Permit & Guard Dog Permit.

Excluding records without neighbourhood.

Excluding records without dog_breed values.

Excluding records with dog_breed listed as Unknown or Cross.

Stones Corner values were assigned to Greenslopes SSC (see information in section above).

dog_ownership %<>% 
  clean_names() %>%
  remove_empty(c("rows", "cols")) %>% 
  # all the same here
  select(-permit_group, -permit_status) %>% 
  # special permits?
  filter(!permit_name %in% c("Breeders Permit", "Cattery Permit", "Racehorses Permit", "Pet Shop Permit")) %>% 
  filter(!permit_name %in% c("Domestic Dog Permit", "Guard Dog Permit")) %>%
  rename(dog_breed = animal_breed,
         SSC_NAME16 = application_location_suburb) %>% 
  mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>% 
  # correct suburb
  mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Stones Corner", "Greenslopes", SSC_NAME16)) %>% 
  # missing geo
  filter(!is.na(SSC_NAME16)) %>% 
  # missing breed
  filter(!is.na(dog_breed)) %>% 
  filter(!dog_breed %in% c("Unknown", 
                           "Medium Cross Breed", "Small Cross Breed", "Large Cross Breed")) %>% 
  # few cleans for better matches
  mutate(
    dog_breed = case_when(
      dog_breed == "German Shepherd Dog (Long Stock Coat)" ~ "German Shepherd",
      dog_breed == "German Shepherd Dog" ~ "German Shepherd",
      dog_breed == "Central Asian Shepherd Dog " ~ "Central Asian Shepherd",
      dog_breed == "Kangal Dog" ~ "Kangal",
      dog_breed == "Bulldog" ~ "British Bulldog",
      dog_breed == "Collie (Rough)" ~ "Rough Collie", 
      dog_breed == "Collie (Smooth)" ~ "Smooth Collie",      
      
      TRUE ~ as.character(dog_breed))
  ) 

write_rds(dog_ownership, "data/permits/dog_ownership.Rds") 

106,018 records after exclusions.

5.2 Data merging - dog_cost

dog_ownership_cost <- left_join(dog_ownership, dog_cost, by = "dog_breed")

# binary indicator "expensive" and "non-expensive" dog breeds according to dog_cost
dog_ownership_cost %<>% 
  mutate(
    expensive = case_when(
      is.na(expensive) ~ 0,
      expensive == "yes" ~ 1
    )
  )

frq(dog_ownership_cost, expensive)
## 
## expensive <numeric>
## # total N=106018  valid N=106018  mean=0.14  sd=0.35
## 
## Value |     N | Raw % | Valid % | Cum. %
## ----------------------------------------
##     0 | 90940 | 85.78 |   85.78 |  85.78
##     1 | 15078 | 14.22 |   14.22 | 100.00
##  <NA> |     0 |  0.00 |    <NA> |   <NA>

5.3 Data merging - dog_insurance

Correcting names for better match

dog_insurance %<>%
  mutate(
    dog_breed = case_when(
      # this is a bit tricky! might need sensitivity?
      dog_breed == "French Poodle" ~ "Poodle",  
      
      # just naming issues
      dog_breed == "Poodle – Standard" ~ "Poodle (Standard)",
      dog_breed == "Miniature Poodle" ~ "Poodle (Miniature)",
      dog_breed == "Poodle – Toy" ~ "Poodle (Toy)",
      dog_breed == "Shar-Pei" ~ "Shar Pei", 
      dog_breed == "German Short Haired Pointer" ~ "German Shorthaired Pointer", 
      dog_breed == "German Wire Haired Pointer" ~ "German Wirehaired Pointer", 
      dog_breed == "Collie – Rough" ~ "Rough Collie", 
      dog_breed == "Collie – Smooth" ~ "Smooth Collie", 
      dog_breed == "Miniature Schnauzer" ~ "Schnauzer (Miniature)", 
      dog_breed == "Schnauzer Giant" ~ "Schnauzer (Giant)", 
      dog_breed == "Lagotto Rom" ~ "Lagotto Romagnolo", 
      dog_breed == "Brittany Spaniel" ~ "Brittany", 
      dog_breed == "Staghound" ~ "Stag Hound", 
      dog_breed == "Kerry Blue" ~ "Kerry Blue Terrier", 
      dog_breed == "English Toy terrier" ~ "English Toy Terrier",
      dog_breed == "Parson Jack Russell Terrier" ~ "Parson Russell Terrier", 
      dog_breed == "Welsh Corgi – Pembroke" ~ "Welsh Corgi (Pembroke)", 
      dog_breed == "American Cocker Spaniel" ~ "Cocker Spaniel (American)",
      dog_breed == "Basset Fauve De Bretagne" ~ "Basset Fauve de Bretagne", 
      dog_breed == "Norwegian Elk Hound" ~ "Norwegian Elkhound", 
      dog_breed == "Cheasapeake Bay Retriever" ~ "Chesapeake Bay Retriever", 
      dog_breed == "Bouvier Des Flandres" ~ "Bouvier des Flandres", 
      dog_breed == "Miniature Bull Terrier" ~ "Bull Terrier (Miniature)", 
      dog_breed == "Munsterlander – Large" ~ "Large Munsterlander", 
      dog_breed == "Welsh Corgi – Cardigan" ~ "Welsh Corgi (Cardigan)", 
      dog_breed == "HamiltonStovare" ~ "Hamiltonstovare",
      dog_breed == "Blue tick Coonhound" ~ "Bluetick Coonhound",
      dog_breed == "Japanese Akita" ~ "Akita (Japanese)",
      dog_breed == "Dogue De Bordeaux" ~ "Dogue de Bordeaux",
      dog_breed == "Italian Cane Corso" ~ "Italian Corso Dog",
      
      # different kelpies but same category anyway
      dog_breed == "Australian Kelpie Sheepdog" ~ "Australian Kelpie", 
      
      # typo
      dog_breed == "Neopolitan Mastiff" ~ "Neapolitan Mastiff", 
      TRUE ~ as.character(dog_breed)))

Dog breeds without match

dog_ownership_cost %>% anti_join(
  dog_insurance %>% select(dog_breed, cost_compared_to_other_breeds)
) %>%
  group_by(dog_breed) %>%
  summarize(n = n()) %>%
  arrange(desc(n)) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
dog_breed n
Fox Terrier 3302
Schnauzer 1591
Welsh Corgi 376
Pointer 128
Akita 55
Chihuahua (Smooth Coat) 33
Chihuahua (Long Coat) 30
Fox Terrier (Smooth) 20
Australian Stumpy Tail Cattle Dog 17
Foxhound 13
White Swiss Shepherd Dog 7
Fox Terrier (Wire) 6
Glen of Imaal Terrier 4
Canadian Eskimo Dog 3
Canaan Dog 2
Central Asian Shepherd Dog 2
Australian Staghound 1
Eurasier 1
Portuguese Podengo 1
Swedish Lapphund 1

Some further corrections still possible here:

dog_ownership_cost <-  left_join(dog_ownership_cost, 
                                 dog_insurance %>% 
                                   select(dog_breed, cost_compared_to_other_breeds)
) %>% 
  mutate(
    cost_compared_to_other_breeds = case_when(
      # all the same
      dog_breed == "Fox Terrier" ~ "Below average",
      dog_breed == "Fox Terrier (Smooth)" ~ "Below average",
      dog_breed == "Fox Terrier (Wire)" ~ "Below average",
      dog_breed == "Schnauzer" ~ "Below average",
      dog_breed == "Australian Stumpy Tail Cattle Dog" ~ "Below average",
      dog_breed == "Foxhound" ~ "Below average",
      dog_breed == "White Swiss Shepherd Dog" ~ "Below average",
      
      # multiple options here, but all above going for conservative
      dog_breed == "Welsh Corgi" ~ "Above average",
      
      # taking values from Chihuahua
      dog_breed == "Chihuahua (Smooth Coat)" ~ "Below average",
      dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
      dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
      
      # naming - Dogue De Bordeaux
      dog_breed == "French Mastiff" ~ "Significantly above average",
      TRUE ~ as.character(cost_compared_to_other_breeds)))

Few things left:

dog_ownership_cost %>% 
  filter(is.na(cost_compared_to_other_breeds)) %>%
  group_by(dog_breed) %>%
  summarize(n = n()) %>%
  arrange(desc(n)) %>% 
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
dog_breed n
Pointer 128
Akita 55
Glen of Imaal Terrier 4
Canadian Eskimo Dog 3
Canaan Dog 2
Central Asian Shepherd Dog 2
Australian Staghound 1
Eurasier 1
Portuguese Podengo 1
Swedish Lapphund 1

Two largest groups:

Pointer - insufficient info! Akita - insufficient info! Could be Inu, could be Japanese

These observations will remain as NAs.

write_rds(dog_ownership_cost, "data/permits/dog_ownership_cost.Rds")